library(conflicted)
library(tidyverse)
library(scales)
library(ggrepel)
library(gghighlight)
library(mapSpain)
library(ESdata)
conflicts_prefer(
dplyr::filter(),
dplyr::select(),
dplyr::lag(),
)Spain Occupational Accident Analysis (SODA)
Business Analytics Project
Introduction
This report covers the analysis of occupational accidents, also known as work accidents, in Spain
Data Exploration
Work Accident Data
load("data/ATR/ATR-I.1.3.RData")
atr |>
filter(sector == "Total",
ccaa == "ES") |>
ggplot(aes(date, accidents)) +
geom_line() +
geom_point()atr |>
filter(ccaa == "ES") |>
ggplot(aes(x = date, y = accidents, color = sector)) +
geom_line() +
geom_point(size=2) +
labs(x = "Year",
y = "Work accidents per 100k workers",
title = "Work accidents in Spain from 2009 to 2021")atr |>
filter(ccaa == "ES") |>
ggplot(aes(x = date, y = accidents, color = sector)) +
geom_line() +
geom_point() +
gghighlight(sector != "total", label_key = sector, use_group_by = FALSE) +
facet_wrap(~sector)atr |>
filter(sector == "Total",
year == 2021) |>
ggplot(aes(x = accidents, y = fct_reorder(ccaa_name, accidents))) +
geom_col() +
labs(x = "Work accidents per 100k workers",
y = NULL,
title = "Work accidents in 2021 per autonomous community")Add year over year change
atr <- atr |>
mutate(acc_yoy = (accidents - lag(accidents)) / lag(accidents),
.by = c(sector, ccaa))atr |>
filter(ccaa == "ES") |>
drop_na() |>
ggplot(aes(x = date, y = acc_yoy, color = sector)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_line() +
geom_point() +
scale_y_continuous(labels = label_percent()) +
labs(x = "Year",
y = "Year over year change",
title = "Work accidents in Spain from 2009 to 2021")atr |>
filter(ccaa == "ES") |>
drop_na() |>
ggplot(aes(x = date, y = acc_yoy, fill = sector)) +
geom_col() +
facet_wrap(~sector) +
scale_y_continuous(labels = label_percent()) +
labs(x = "Year",
y = "Year over year change",
title = "Work accidents in Spain from 2009 to 2021") +
theme(legend.position = c(1, 0),
legend.justification = c(1, 0))Working population
work_pop <- epa_sector |>
filter(edad == "total",
sexo == "total") |>
select(-edad, -sexo)
work_pop |>
filter(region == "ES") |>
ggplot(aes(x = periodo, y = valor, color = sector)) +
geom_line() +
geom_point() +
labs(x = "Year", y = "100k workers",
title = "Active working population in Spain")Working population per sector
# Add relative work pop per sector
work_pop <- work_pop |>
mutate(rel = valor / valor[sector == "total"],
.by = c(periodo, region))
work_pop |>
# Add full AC names
left_join(ccaa_iso, by = join_by(region == iso)) |>
filter(periodo == "2023-01-31",
sector != "total",
!region %in% c("ES-ML", "ES-CE")) |>
ggplot(aes(rel, fct_reorder(nombres, rel), fill = sector)) +
geom_col() +
scale_x_continuous(labels = label_percent()) +
labs(x = "Percentage", y = NULL,
title = "Relative working population per sector")Non-EU-working population
work_noEU <- epa_nac |>
# Only view all sexes and working population
filter(sexo == "total",
dato == "act") |>
select(-sexo, -dato) |>
# Compute relative amount of workforce per nationality
mutate(rel = valor / valor[nac == "total"],
.by = c(region, periodo)) |>
# Only keep no-UE
filter(nac == "no-UE") |>
select(-valor, -nac) |>
rename(noEU_rel = rel)
work_noEU |> head()Plot non-EU workforce per AC:
work_noEU |>
filter(periodo == "2023-01-31") |>
# Add full AC names
left_join(ccaa_iso, by = join_by(region == iso)) |>
ggplot(aes(x = noEU_rel, y = fct_reorder(nombres, noEU_rel))) +
geom_col() +
scale_x_continuous(labels = label_percent()) +
labs(x = "Percentage",
y = NULL,
title = "Non-EU working population in 2023 per autonomous community")GDP
agr = c("vab_A", "vab_BE", "vab_F", "vab_GT", "pib")
agr_rename = c(
"agricultura" = "vab_A",
"industria" = "vab_BE",
"construccion" = "vab_F",
"servicios" = "vab_GT",
"total" = "pib"
)
gdp <- pib_pm_oferta |>
filter(agregado %in% agr,
tipo == "indice",
dato == "base",
ajuste == "ajustado") |>
mutate(month = month(periodo), year = year(periodo)) |>
filter(month == 6) |>
# Remove identical elements
distinct(across(periodo:dato), .keep_all = TRUE) |>
select(periodo, agregado, valor) |>
rename(date = periodo, sector = agregado, gdp = valor) |>
mutate(sector = fct_recode(sector, !!!agr_rename))
gdp |>
ggplot(aes(date, gdp, color = sector)) +
geom_line()Inflation
infl <- ipc_ccaa |>
# Only extract annual inflation and price index over all groups
filter(grupo == "general",
dato %in% c("anual", "indice")) |>
select(-grupo) |>
pivot_wider(names_from = dato, values_from = valor) |>
rename(infl_cpi = indice, infl_ann = anual) |>
# Annual inflation in decimal format
mutate(infl_ann = infl_ann / 100)
infl |> head()Map Plots
# Get Spanish ACs as simple features
ccaa <- esp_get_ccaa()
can_box <- esp_get_can_box()
atr_2021 <- atr |>
filter(year == 2021,
ccaa != "ES",
sector == "Total") |>
mutate(ccaa = as.character(ccaa))
# Join SF and work accidents data
ccaa_atr <- ccaa |>
inner_join(atr_2021, by = join_by(iso2.ccaa.code == ccaa))
ggplot(ccaa_atr) +
# Plot ACs
geom_sf(aes(fill = accidents),
color = "grey70",
linewidth = .3) +
# Plot canaries box
geom_sf(data = can_box, color = "grey70") +
# Plot labels with accident number
geom_label_repel(
aes(label = round(accidents), geometry = geometry),
stat = "sf_coordinates",
fill = alpha(c("white"), 0.5),
color = "black",
size = 3,
label.size = 0
) +
# Adjust color scale
scale_fill_distiller(palette = "Blues", direction = 1) +
theme_void() +
theme(legend.position = c(0.12, 0.6)) +
labs(x = NULL,
y = NULL,
title = "Work accidents in 2021 per autonomous community",
fill = "Accidents per\n100k workers")library(plotly)
library(sf)
ccaa <- esp_get_ccaa() |> st_cast("MULTIPOLYGON")
ccaa_atr <- ccaa |>
inner_join(atr_2021, by = join_by(iso2.ccaa.code == ccaa))
p <- ggplot(ccaa_atr) +
geom_sf(aes(fill = accidents,
text = paste0(ccaa_name, ":\n", round(accidents))),
color = "grey70",
linewidth = .3) +
geom_sf(data = can_box, color = "grey70") +
scale_fill_distiller(palette = "Blues", direction = 1) +
theme_void() +
labs(x = NULL,
y = NULL,
title = "Work accidents in 2021 per autonomous community",
fill = "Accidents per\n100k workers")
ggplotly(p, tooltip = "text") |> style(hoveron = "fill")